In [8]:
import pandas as pd

# Load the dataset
movies = pd.read_csv(r"C:\Users\lenovo\Downloads\Datasets\Datasets\movies.csv")

# Check the first few rows to understand the structure
movies.head()

# Check the columns to see what data we have
movies.columns
Out[8]:
Index(['title_x', 'imdb_id', 'poster_path', 'wiki_link', 'title_y',
       'original_title', 'is_adult', 'year_of_release', 'runtime', 'genres',
       'imdb_rating', 'imdb_votes', 'story', 'summary', 'tagline', 'actors',
       'wins_nominations', 'release_date'],
      dtype='object')
In [40]:
movies['summary'].head()  
Out[40]:
0     Indian army special forces execute a covert op...
4     Under the 'Evening Shadows'  truth often plays...
5     While fighting crimes against women in Delhi  ...
11    A coming-of-age story based on the lives of st...
15    A dynamic young entrepreneur finds herself loc...
Name: summary, dtype: object
In [6]:
movies.head()
Out[6]:
title_x imdb_id poster_path wiki_link title_y original_title is_adult year_of_release runtime genres imdb_rating imdb_votes story summary tagline actors wins_nominations release_date sentiment
0 Uri: The Surgical Strike tt8291224 https://upload.wikimedia.org/wikipedia/en/thum... https://en.wikipedia.org/wiki/Uri:_The_Surgica... Uri: The Surgical Strike Uri: The Surgical Strike 0 2019 138 Action|Drama|War 8.4 35112 Divided over five chapters the film chronicle... Indian army special forces execute a covert op... NaN Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga... 4 wins 11 January 2019 (USA) -0.221429
1 Battalion 609 tt9472208 NaN https://en.wikipedia.org/wiki/Battalion_609 Battalion 609 Battalion 609 0 2019 131 War 4.1 73 The story revolves around a cricket match betw... The story of Battalion 609 revolves around a c... NaN Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen... NaN 11 January 2019 (India) 0.233333
2 The Accidental Prime Minister (film) tt6986710 https://upload.wikimedia.org/wikipedia/en/thum... https://en.wikipedia.org/wiki/The_Accidental_P... The Accidental Prime Minister The Accidental Prime Minister 0 2019 112 Biography|Drama 6.1 5549 Based on the memoir by Indian policy analyst S... Explores Manmohan Singh's tenure as the Prime ... NaN Anupam Kher|Akshaye Khanna|Aahana Kumra|Atul S... NaN 11 January 2019 (USA) 0.600000
3 Why Cheat India tt8108208 https://upload.wikimedia.org/wikipedia/en/thum... https://en.wikipedia.org/wiki/Why_Cheat_India Why Cheat India Why Cheat India 0 2019 121 Crime|Drama 6.0 1891 The movie focuses on existing malpractices in ... The movie focuses on existing malpractices in ... NaN Emraan Hashmi|Shreya Dhanwanthary|Snighdadeep ... NaN 18 January 2019 (USA) 0.200000
4 Evening Shadows tt6028796 NaN https://en.wikipedia.org/wiki/Evening_Shadows Evening Shadows Evening Shadows 0 2018 102 Drama 7.3 280 While gay rights and marriage equality has bee... Under the 'Evening Shadows' truth often plays... NaN Mona Ambegaonkar|Ananth Narayan Mahadevan|Deva... 17 wins & 1 nomination 11 January 2019 (India) 0.000000
In [13]:
from textblob import TextBlob

# Define a function to get the sentiment polarity
def get_sentiment(text):
    blob = TextBlob(str(text))
    return blob.sentiment.polarity

# Apply the function to the 'description' column
movies['sentiment'] = movies['summary'].apply(get_sentiment)

# Show the updated dataset with sentiment values
movies[['title_x', 'sentiment']].head()
Out[13]:
title_x sentiment
0 Uri: The Surgical Strike -0.221429
1 Battalion 609 0.233333
2 The Accidental Prime Minister (film) 0.600000
3 Why Cheat India 0.200000
4 Evening Shadows 0.000000
In [14]:
import matplotlib.pyplot as plt

# Plotting sentiment distribution
plt.figure(figsize=(10,6))
plt.hist(movies['sentiment'], bins=50, color='skyblue', edgecolor='black')
plt.title('Sentiment Distribution of Movie Descriptions', fontsize=16)
plt.xlabel('Sentiment Score', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.show()
No description has been provided for this image
In [15]:
# Assuming you have a 'rating' column
plt.figure(figsize=(10,6))
plt.scatter(movies['imdb_rating'], movies['sentiment'], alpha=0.6, color='orange')
plt.title('Sentiment vs imdb_rating', fontsize=16)
plt.xlabel('Movie Rating', fontsize=12)
plt.ylabel('Sentiment Score', fontsize=12)
plt.show()
No description has been provided for this image
In [16]:
# Sorting movies by highest sentiment
top_positive_movies = movies.sort_values(by='sentiment', ascending=False).head(10)

# Plotting
plt.figure(figsize=(10,6))
plt.barh(top_positive_movies['title_x'], top_positive_movies['sentiment'], color='green')
plt.title('Top 10 Movies with Positive Sentiment', fontsize=16)
plt.xlabel('Sentiment Score', fontsize=12)
plt.ylabel('Movie Title', fontsize=12)
plt.show()
No description has been provided for this image
In [17]:
# Assuming there's a 'genre' column
genre_sentiment = movies.groupby('genres')['sentiment'].mean().head(20)

# Plotting average sentiment by genre
genre_sentiment.sort_values().plot(kind='barh', figsize=(10,6), color='purple')
plt.title('Average Sentiment by genres', fontsize=16)
plt.xlabel('Average Sentiment', fontsize=12)
plt.ylabel('genres', fontsize=12)
plt.show()
No description has been provided for this image
In [18]:
movies.head(2)
Out[18]:
title_x imdb_id poster_path wiki_link title_y original_title is_adult year_of_release runtime genres imdb_rating imdb_votes story summary tagline actors wins_nominations release_date sentiment
0 Uri: The Surgical Strike tt8291224 https://upload.wikimedia.org/wikipedia/en/thum... https://en.wikipedia.org/wiki/Uri:_The_Surgica... Uri: The Surgical Strike Uri: The Surgical Strike 0 2019 138 Action|Drama|War 8.4 35112 Divided over five chapters the film chronicle... Indian army special forces execute a covert op... NaN Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga... 4 wins 11 January 2019 (USA) -0.221429
1 Battalion 609 tt9472208 NaN https://en.wikipedia.org/wiki/Battalion_609 Battalion 609 Battalion 609 0 2019 131 War 4.1 73 The story revolves around a cricket match betw... The story of Battalion 609 revolves around a c... NaN Vicky Ahuja|Shoaib Ibrahim|Shrikant Kamat|Elen... NaN 11 January 2019 (India) 0.233333
In [19]:
pip install plotly
Requirement already satisfied: plotly in c:\users\lenovo\downloads\snscrape_project\snscrape-env\lib\site-packages (6.0.1)
Requirement already satisfied: narwhals>=1.15.1 in c:\users\lenovo\downloads\snscrape_project\snscrape-env\lib\site-packages (from plotly) (1.36.0)
Requirement already satisfied: packaging in c:\users\lenovo\downloads\snscrape_project\snscrape-env\lib\site-packages (from plotly) (25.0)
Note: you may need to restart the kernel to use updated packages.
[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: C:\Users\lenovo\Downloads\snscrape_project\snscrape-env\Scripts\python.exe -m pip install --upgrade pip
In [32]:
import plotly.express as px

# Drop rows with missing required fields
movies= movies.dropna(subset=['imdb_rating', 'imdb_votes', 'wins_nominations']).head(40)

fig = px.scatter_3d(
    movies,
    x='imdb_rating',
    y='imdb_votes',
    z='wins_nominations',
    color='imdb_votes',
    hover_name='title_x',
    title='Interactive 3D Scatter: imdb_rating vs imdb_votes vs wins_nominations',
    color_continuous_scale='RdBu'
)

fig.show()
In [33]:
movies.head(1)
Out[33]:
title_x imdb_id poster_path wiki_link title_y original_title is_adult year_of_release runtime genres imdb_rating imdb_votes story summary tagline actors wins_nominations release_date
0 Uri: The Surgical Strike tt8291224 https://upload.wikimedia.org/wikipedia/en/thum... https://en.wikipedia.org/wiki/Uri:_The_Surgica... Uri: The Surgical Strike Uri: The Surgical Strike 0 2019 138 Action|Drama|War 8.4 35112 Divided over five chapters the film chronicle... Indian army special forces execute a covert op... NaN Vicky Kaushal|Paresh Rawal|Mohit Raina|Yami Ga... 4 wins 11 January 2019 (USA)
In [34]:
print(movies.columns)
Index(['title_x', 'imdb_id', 'poster_path', 'wiki_link', 'title_y',
       'original_title', 'is_adult', 'year_of_release', 'runtime', 'genres',
       'imdb_rating', 'imdb_votes', 'story', 'summary', 'tagline', 'actors',
       'wins_nominations', 'release_date'],
      dtype='object')
In [35]:
import plotly.graph_objects as go
import pandas as pd

# Filter and clean data
movies_clean = movies.dropna(subset=['imdb_rating', 'wins_nominations', 'year_of_release'])

# Create 3D box plot simulation using scatter for box outlines
fig = go.Figure()

# Box 1: imdb_rating vs year_of_release
fig.add_trace(go.Box(
    y=movies_clean['imdb_rating'],
    x=movies_clean['year_of_release'],
    name='IMDb vs Year',
    boxpoints='outliers',
    marker_color='orange'
))

# Box 2: wins_nominations vs year_of_release
fig.add_trace(go.Box(
    y=movies_clean['wins_nominations'],
    x=movies_clean['year_of_release'],
    name='Wins vs Year',
    boxpoints='outliers',
    marker_color='skyblue'
))

# Box 3: imdb_rating vs wins_nominations
fig.add_trace(go.Box(
    y=movies_clean['imdb_rating'],
    x=movies_clean['wins_nominations'],
    name='IMDb vs Wins',
    boxpoints='outliers',
    marker_color='green'
))

fig.update_layout(
    title='3D-style Box Plot of IMDb, Awards, and Year',
    xaxis_title='X Axis',
    yaxis_title='Y Axis',
    showlegend=True,
    template='plotly_dark'
)

fig.show()
In [37]:
import plotly.express as px
import pandas as pd

# Clean the dataset: ensure all required fields are available
movies_clean = movies.dropna(subset=['title_x', 'imdb_rating', 'wins_nominations', 'year_of_release', 'story']).tail(10)

# Create the 3D scatter plot
fig = px.scatter_3d(
    movies_clean,
    x='imdb_rating',
    y='wins_nominations',
    z='year_of_release',
    color='imdb_rating',  # color intensity based on rating
    hover_data=['title_x', 'story'],  # show title and story on hover
    title='3D Movie Visualization: Rating, Awards, and Release Year',
    labels={
        'imdb_rating': 'IMDb Rating',
        'wins_nominations': 'Wins/Nominations',
        'year_of_release': 'Release Year',
        'title_x': 'Title',
        'story': 'Story'
    },
    color_continuous_scale='Viridis'
)

fig.update_traces(marker=dict(size=5, opacity=0.8))
fig.update_layout(
    scene=dict(
        xaxis_title='IMDb Rating',
        yaxis_title='Wins/Nominations',
        zaxis_title='Year of Release'
    ),
    template='plotly_dark'
)

fig.show()
In [ ]:
 
In [ ]: